<?php
// LC_ALL=en_US.UTF-8 /usr/local/bin/casperjs downHtml.js '{$url}' '{$dir}'
include_once('../lib/config.php');
include_once('../lib/Db.class.php');
include_once('../lib/Model.class.php');
include_once('../lib/function.php');

$page = 1;
$dir = dirname(__FILE__) . '/files/downHtml/';
$default_url = 'https://www.ituba.cc/meinvtupian/p{page}.html';

$redis = new Redis();
$redis->connect(REDIS_IP,"6379");  //php客户端设置的ip及端口
if(!$redis) die('redis connect fail');
$redis->auth(REDIS_PASS);

$GLOBALS['config']['master']['db_database'] = 'se126';
$GLOBALS['config']['slave']['db_database']  = 'se126';

$model = new Db;

if(!file_exists($dir)) {
	mkdir($dir, 0777, true);
}

function se_insert_db($filename) {
	if(!file_exists($filename)) return;
	global $model, $redis;

	$file = file_get_contents($filename);
	$file = iconv('gb2312', 'utf-8//IGNORE', $file);
	$file = cut('<ul class="ListTitle">', '</ul>', $file);
	preg_match_all('/<li[^>]*>(.*?)<\/li>/is', $file, $matches);

	foreach ($matches[1] as $item) {
		$tmp = [];
		$tmp['url'] = cut('<a href="', '"', $item);
		$tmp['thumb'] = cut('<img src="', '"', $item);

		$tmp['title'] = cut('</span>', '</a>', $item);
		$tmp['title'] = trim(strip_tags($tmp['title']));

		$tmp['category'] = cut('<em>', '</em>', $item);
		$tmp['category'] = trim(strip_tags($tmp['category']));

		$tags = cut('<p class="ListTag">', '</p>', $item);
		preg_match_all('/<a[^>]*>(.*?)<\/a>/i', $tags, $match);
		$tmp['tags'] = implode(',', $match[1]);

		if($tmp['url'] && $tmp['thumb'] && $tmp['title']) {
			$old_id = $redis->hGet('se126:casper_news', $tmp['url']);
			if(!$old_id) {
				$r = $model->add('casper_news', $tmp);
				if($r) {
					$new_id = $model->insert_id();
					$redis->hSet('se126:casper_news', $tmp['url'], $new_id);

					echo2($new_id .' '.$tmp['url'], 'db insert success');
				}
			} else {
				echo2($old_id .' '.$tmp['url'], 'db old warning');
			}

			// 将thumb地址压入文件列表
			$parse_url = parse_url($tmp['thumb']);
			$thumbPath = dirname(__FILE__) . '/files' . $parse_url['path'];
			$thumbListTxt = dirname(__FILE__) . '/files/thumb.txt';
			if(!file_exists($thumbPath)) {
				file_put_contents($thumbListTxt, $tmp['thumb']."\n", FILE_APPEND);
			}
		} else {
			echo2(print_r($tmp, true), 'tmp error');
		}
	}
}

while (true) {
	$url = str_replace('{page}', $page, $default_url);

	if($page > 508) {
		echo2($page, 'END');
		break;
	}

	//判断是否已存在
	$filename = $dir . urlencode($url);
	if(file_exists($filename)) {
		echo2($filename, 'EXISTS warning');

		$page++;
		se_insert_db($filename);
		continue;
	}

	$shell = "LC_ALL=en_US.UTF-8 /usr/local/bin/casperjs downHtml.js '{$url}' '{$dir}'";
    echo2($shell, 'shell.command');
    $result = shell_exec($shell);
    echo2($result, 'shell.result');

    $page++;
    se_insert_db($filename);
}
